*** Appendix figures

clear all
set more off
set maxvar 20000

** note: type your directory here
global dir_dest "/replication/Data/"
global dir_dest_figures "/replication/Figures/"
global dir_dest_tables "/replication/Tables/" 

***********************************************************************************************************
// Figure A1, A4, A6.  Difference between hand coding and nlp coding results, by topic
***********************************************************************************************************

* Random forest
set more off
use ${dir_dest}1_regulation_coding.dta,clear
label var cat "regulation topic"
hist diffrf,by(cat,graphregion(color(white))) bcolor(ltblue) barw(0.8) frac ///
     xtitle("Hand-coded score minus random forest score") legend(off)
graph export ${dir_dest_figures}random_forest_topic.png,replace

** MLP
hist diffmlp,by(cat,graphregion(color(white))) bcolor(ltblue) barw(0.8) frac xtitle("Hand-coded score minus MLP score")
graph export ${dir_dest_figures}MLP_topic.png,replace

** CNN
hist diffcnn,by(cat,graphregion(color(white))) bcolor(ltblue) barw(0.8) frac xtitle("Hand-coded score minus CNN score")
graph export ${dir_dest_figures}CNN_topic.png,replace


***********************************************************************************************************
// Figure B1.  Distribution of tariff changes and export growth across industries, 2001–2007
***********************************************************************************************************

use ${dir_dest}7_trade_elasticity.dta,clear

***********************************************************************************************************
// Figure B2.  Industry concentration across prefectures in 2000, Herfindahl Index
***********************************************************************************************************

set more off
use ${dir_dest}9_herfindahl.dta,clear

***********************************************************************************************************
// Figure C1. The prefecture-level correlation between the change in regulation scores coded manually and coded by NLP methods (2001–2007)
***********************************************************************************************************

set more off
use ${dir_dest}2_main_regression.dta,clear
corr d_lrf d_lhc if year==2001
twoway (scatter d_lrf d_lhc if year==2001,mc(navy))(lfit d_lhc d_lhc if year==2001,lc(navy)) ///
       (lfit d_lrf d_lhc if year==2001,lc(navy) lp(dash)), ///
       graphregion(color(white)) xtitle("Change in sinh{sup:-1} (regulation score), manual coding") ///
	   ytitle("Change in sinh{sup:-1} (regulation score), RF coding") legend(order(2 "45 degree line" 3 "linear fitted line"))
graph export ${dir_dest_figures}nlp_manual_correlation.png,replace


***********************************************************************************************************
// Figure C2. Effect of changes in tariffs on exports on regulation change (1995–2001)
***********************************************************************************************************

set more off
use ${dir_dest}2_main_regression.dta,clear
gen cname1="Beijing" if citygb==1101
gen cname2="Shanghai" if citygb==3101
gen cname3="Guangzhou" if citygb==4401

keep if d_l_gdppct!=.
keep if lambda13<=.0548622
keep if year==1995

reg d_lhc tf_exp_wage if year==1995 ,cluster(provgb)
reg d_lhc tf_exp_wage if year==1995 [aw=totpop_st] ,cluster(provgb)

twoway (scatter d_lhc tf_exp_wage if year==1995,  msymbol(smdiamond) mcolor(navy) text(-0.5 -0.9 "Slope = –0.01 (s.e. = 0.04)", color(navy)  place(se) size(small)) text(0.5 -2 "Slope = 0.04 (s.e. = 0.07)", color(red) place(s) size(small))) ///
       (scatter d_lhc tf_exp_wage if year==1995 & cname1!="",mlabel(cname1) mlabcolor(emidblue) msymbol(circle) mcolor(emidblue) mlabp(6) ) ///
	   (scatter d_lhc tf_exp_wage if year==1995 & cname2!="",mlabel(cname2) mlabcolor(emidblue) msymbol(circle) mcolor(emidblue) mlabp(11) ) ///
	   (scatter d_lhc tf_exp_wage if year==1995 & cname3!="",mlabel(cname3) mlabcolor(emidblue) msymbol(circle) mcolor(emidblue) mlabp(2) ) ///
       (lfit d_lhc tf_exp_wage if year==1995  , lcolor(navy) lpattern(dash))  ///
	   (lfit d_lhc tf_exp_wage if year==1995 [aw=totpop_st] , lcolor(red) lpattern(shortdash_dot))  ///
	   ,graphregion(color(white)) xtitle("Changes in tariffs on exports, 1995—2001") ///
	   ytitle("Changes in sinh{sup:-1} (regulation score), 1995—2001")  ///
	   legend(order(5 "Unweighted" 6 "Weighted by 1995 population"))	    
graph export ${dir_dest_figures}/tariff_regulation_pre.png,replace 


***********************************************************************************************************
// Figure C3. Effect of changes in tariffs on exports on regulation change (1995–2001)
***********************************************************************************************************

set more off
use ${dir_dest}2_main_regression.dta,clear
keep if year==2001 
gen cnst=1
local controls tf_imp_wage tf_m_wage lhc_st
egen beta_t=rowtotal(beta39 beta38 beta37 beta36 beta35 beta32 beta31 beta300 beta30 beta29 beta28 beta27 beta26 beta25 beta24 beta23 beta22 beta21 beta20 beta14 beta13 beta12 beta10)
drop beta_t
local balances cnst
reg d_lhc tf_exp_wage tf_imp_wage tf_m_wage lhc_st
ssaggregate `balances' d_lhc tf_exp_wage ,n(SIC) t(year) s(beta) controls("`controls'")

merge 1:1 SIC using ${dir_dest}7_trade_elasticity.dta,update
drop if _merge != 3  
drop _merge
drop imp2001 d_l_imp2001 d_t2001 d_l_imp1995 d_t1995

ivreg d_lhc (tf_exp_wage=d_tariff_2001) [aw=s_n]
reg tf_exp_wage d_tariff_2001 [aw=s_n]
predict a
reg d_lhc a [aw=s_n]

twoway (scatter d_lhc a if SIC==10,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==12,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==13,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==14,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==20,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==21,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==22,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==23,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==24,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==25,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==26,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==27,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==28,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==29,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==30,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==31,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==32,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==300,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==35,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==36,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==37,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==38,mlabel(SIC) mlabp(12)) ///
       (scatter d_lhc a if SIC==39,mlabel(SIC) mlabp(12)) ///
	   (lfit d_lhc a [aw=s_n]) /// 
	   ,graphregion(color(white)) xtitle("Predicted industry-level changes in tariffs on exports") ///
	   ytitle("Predicted changes in sinh{sup:-1} (regulation score) ") legend(off)

graph export "${dir_dest_figures}/industry_plot.png",replace



***********************************************************************************************************
// Figure C4. Coefficients from the main regression by adding industrial-composition controls one by one
***********************************************************************************************************

set more off
use ${dir_dest}2_main_regression.dta,clear
keep if year==2001
keep if lambda13<=.0548622
keep if d_l_gdppct!=.

global SICLIST  10 12 13 14 20 21 22 23 24 25 26 27 28 29 30 31 32 35 36 37 38 39 300

set more off
quietly reg d_lhc tf_exp_wage tf_imp_wage tf_m_wage lhc_st if year==2001 , cluster(provgb)
outreg2 using ${dir_dest_tables}ind.tex,replace dec(2)
foreach SIC of global SICLIST {
quietly reg d_lhc tf_exp_wage tf_imp_wage tf_m_wage lhc_st lambda`SIC' if year==2001, cluster(provgb)
outreg2 using ${dir_dest_tables}ind.tex,append dec(2)
parmest, saving(${dir_dest}result`SIC',replace) level(90)
}

set more off
use ${dir_dest}result10.dta,clear
gen ind=10
foreach SIC of global SICLIST {
append using ${dir_dest}result`SIC'.dta
replace ind=`SIC' if ind==.
}
keep if parm=="tf_exp_wage"
duplicates drop
sort ind
gen n=_n
label define ind ///
1	"Metallic ores" ///
2	"Coal" ///
3	"Petroleum" ///
4	"Nonmetallic mineral" ///
5	"Food" ///
6	"Tobacco" ///
7	"Textile" ///
8	"Apparel" ///
9	"Lumber" ///
10	"Furniture" ///
11	"Paper" ///
12	"Printing" ///
13	"Chemicals" ///
14	"Petroleum refining" ///
15	"Rubber" ///
16	"Leather" ///
17	"Stone" ///
18	"Machinery" ///
19	"Electrical machinery" ///
20	"Transp. equipment" ///
21	"Scientific instruments" ///
22	"Miscellaneous" ///
23	"Metal" 

label val n ind
twoway rcap min90 max90 n , lcolor(black) lwidth(thin) ///
 xtitle("Industry (2-digit SIC code)") ///
 ytitle("Coefficient estimate for export tariff, 90% CI") ///
 graphregion(color(white)) xlabel(1(1)23, valuelabel angle(45)) yline(-1.59) 
graph export ${dir_dest_figures}robust_ind.png,replace
